# "The Strength of Weak Ties in Shaping Attitudes toward Interfaith Marriage: 
# Muslim Migrants in Western Europe"
# https://doi.org/10.1016/j.ijintrel.2025.102288
# Tolga Tezcan

# Replication code
# 09/02/2025

# 1. SETUP ----
## 1.1. Packages ----
library(tidyverse)       # for data manipulation
library(sjmisc)          # for recoding variables
library(sjlabelled)      # for labeling variables
library(lme4)            # for mixed-effect logistic regression models
library(sjPlot)          # for creating tables of model outputs
library(lmtest)          # for Log-likelihood
library(performance)     # for checking the performance of models
library(emmeans)         # for contrast test
library(ggeffects)       # for computing marginal effects
library(gridExtra)       # for arranging multiple plots in a grid layout
library(margins)         # for computing average marginal effects
library(marginaleffects) # for computing average slops
library(boot)            # for bootstrapping
library(kableExtra)      # for tables

## 1.2. Loading data ----
# The data is in DANS Data Station Social Sciences and Humanities 
# at http://doi.org/10.17026/dans-xx7-5x27.
df <- haven::read_dta("EURISLAM Dataset Survey-data.dta")

## 1.3. Creating data frames ----
### 1.3.1. Creating a copy data frame (df_s)----
df_s <- df

### 1.3.2. Creating a data frame for Muslim participants (df_muslimpar)----
df_muslimpar <- df_s %>% 
  filter(red1==5)

### 1.3.3. Creating a data frame for Muslim migrants (df_muslim)----
df_muslimpar$groups <- rec(df_muslimpar$group,rec=
                             " 2=1 [Yugoslavian];
                      3=0 [Turkish];
                      4=2 [Moroccan];
                      5=3 [Pakistani];
                        1=NA", 
                           append = FALSE)
df_muslimpar$group <- NULL
df_muslimpar$groups <- to_factor(df_muslimpar$groups)
set_label(df_muslimpar$groups) <- "groups"
df_muslim<- df_muslimpar %>% 
  filter(groups==0 | groups==1 | groups==2| groups==3)

# 2. RECODING VARIABLES----
## 2.1. Dependent variable----
df_muslim$intermaryes <- rec(df_muslim$att3,rec=
                               "2=0 [unpleasant];
                      1,3=1 [pleasant];
                   77,88,NA=NA", 
                             append = FALSE)
df_muslim$intermaryes <- set_label(df_muslim$intermaryes, "intermarriage approval")
df_muslim$intermaryes <- to_factor(df_muslim$intermaryes)

## 2.2. Control variables----
### 2.2.1. Gender----
df_muslim$man <- rec(df_muslim$gender,rec=
                       "1=1 [man];
                      2=0 [woman]", 
                     append = FALSE)
df_muslim$man <- set_label(df_muslim$man, "Man")
df_muslim$man <- to_factor(df_muslim$man)

### 2.2.2. Age----
df_muslim <- df_muslim %>%
  mutate (age = 2011 - df_muslim$ori1)
df_muslim$age_cent <-scale(df_muslim$age, center = TRUE, scale = FALSE)
df_muslim$age_cent <- as.numeric(as.character(df_muslim$age_cent))

### 2.2.3. Marital status----
df_muslim$married <- ifelse(df_muslim$par1 == 1, 1, 0)
df_muslim$married <- set_label(df_muslim$married, "married")
df_muslim$married <- to_factor(df_muslim$married)

### 2.2.4. Generation----
df_muslim$generation <- 0
df_muslim$generation[df_muslim$ori3 <= 17] <- 1
df_muslim$generation[df_muslim$ori3 >= 18] <- 2
df_muslim$generation <- rec(df_muslim$generation,rec=
                              "0=2 [second generation];
                      1=1 [1.5 generation];
                      2=0 [first generation]", 
                            append = FALSE)
set_label(df_muslim$generation) <- "Generation"
df_muslim$generation <- to_factor(df_muslim$generation)
df_muslim$firstgen <- ifelse(df_muslim$generation == 0, 1, 0)
df_muslim$firstgen <- to_factor(df_muslim$firstgen)
df_muslim$onefivegen <- ifelse(df_muslim$generation == 1, 1, 0)
df_muslim$onefivegen <- to_factor(df_muslim$onefivegen)
df_muslim$secondgen <- ifelse(df_muslim$generation == 2, 1, 0)
df_muslim$secondgen <- to_factor(df_muslim$secondgen)

### 2.2.5. Education----
df_muslim$edulvl3_r <- rec(df_muslim$edulvl3,rec=
                             "else=copy;
                   4,77,88,NA=NA", 
                           append = FALSE)
df_muslim$primaryschool <- ifelse(df_muslim$edulvl3_r == 1, 1, 0)
df_muslim$primaryschool <- to_factor(df_muslim$primaryschool)
df_muslim$secondaryschool <- ifelse(df_muslim$edulvl3_r == 2, 1, 0)
df_muslim$secondaryschool <- to_factor(df_muslim$secondaryschool)
df_muslim$tertiaryschool <- ifelse(df_muslim$edulvl3_r == 3, 1, 0)
df_muslim$tertiaryschool <- to_factor(df_muslim$tertiaryschool)

### 2.2.6. Employment----
df_muslim$employed <- ifelse(df_muslim$emp1 == 1, 1, 0)
df_muslim$employed <- to_factor(df_muslim$employed)

### 2.2.7. Language proficiency----
df_muslim$lan4 <- remove_all_labels(df_muslim$lan4)
df_muslim$langprof <- rec(df_muslim$lan4,rec=
                            "1=5 [5];
                                  2=4 [4];
                                  3=3 [3];
                                  4=2 [2];
                                  5=1 [1];
                                  6=NA", 
                          append = FALSE)
df_muslim$langprof_cent <-scale(df_muslim$langprof, center = TRUE, scale = FALSE)
df_muslim$langprof_cent <- as.numeric(as.character(df_muslim$langprof_cent))

### 2.2.8. Perceived distance----
df_muslim$percdistance <- df_muslim$pda3
df_muslim$percdistance_cent <-scale(df_muslim$percdistance, center = TRUE, scale = FALSE)

### 2.2.9. Subjective religiosity----
df_muslim$subj_relig <- rec(df_muslim$rel4,rec=
                              "1=5 [very strongly];
                     2=4 [strongly];
                     3=3 [somewhat];
                     4=2 [hardly];
                     5=1 [not at all]",
                            append = FALSE)
set_label(df_muslim$subj_relig) <- "subjective religiosity"
df_muslim$subj_relig_cent <-scale(df_muslim$subj_relig, center = TRUE, scale = FALSE)

### 2.2.10. Individual religiosity----
df_muslim$indiv_relig <- rec(df_muslim$rel1,rec=
                               "1=5 [Several times a day];
                                  2=4 [Once a day];
                                  3=3 [Once or a few times a week];
                                  4=2 [Only on special occasions];
                                  5=1 [Never];
                                  6=NA", 
                             append = FALSE)
set_label(df_muslim$indiv_relig) <- "Individual religiosity"
df_muslim$indiv_relig_cent <-scale(df_muslim$indiv_relig, center = TRUE, scale = FALSE)

### 2.2.11. Communal religiosity----
df_muslim$com_relig <- rec(df_muslim$rel2,rec=
                             "1=4 [Daily];
                                  2=3 [Weekly];
                                  3=2 [Rarely];
                                  4=1 [Never];
                                  6=NA", 
                           append = FALSE)
set_label(df_muslim$com_relig) <- "Communal religiosity"
df_muslim$com_relig_cent <-scale(df_muslim$com_relig, center = TRUE, scale = FALSE)

### 2.2.12. Home identiy----
df_muslim$identityhome <- rec(df_muslim$id3,rec=
                                "1=5 [5];
                      2=4 [4];
                      3=3 [3];
                      4=2 [2];
                      5=1 [1]", 
                              append = FALSE)
set_label(df_muslim$identityhome) <- "identity: home"
df_muslim$identityhome_cent <-scale(df_muslim$identityhome, center = TRUE, scale = FALSE)
df_muslim$identityhome_cent <- as.numeric(as.character(df_muslim$identityhome_cent))

### 2.2.13. Host identiy----
df_muslim$identityhost <- rec(df_muslim$id1,rec=
                                "1=5 [5];
                      2=4 [4];
                      3=3 [3];
                      4=2 [2];
                      5=1 [1]", 
                              append = FALSE)
set_label(df_muslim$identityhost) <- "Identity: host"
df_muslim$identityhost_cent <-scale(df_muslim$identityhost, center = TRUE, scale = FALSE)
df_muslim$identityhost_cent <- as.numeric(as.character(df_muslim$identityhost_cent))

### 2.2.14. Strong ties----
df_muslim$strongties <- rec(df_muslim$son2,rec=
                              "1=5 [5];
                                  2=4 [4];
                                  3=3 [3];
                                  4=2 [2];
                                  5=1 [1];
                                  6=NA", 
                            append = FALSE)
set_label(df_muslim$strongties) <- "Strong Ties"
df_muslim$strongties_cent <-scale(df_muslim$strongties, center = TRUE, scale = FALSE)
df_muslim$strongties_cent <- as.numeric(as.character(df_muslim$strongties_cent))

### 2.2.15. Weak ties----
df_muslim$weakties <- rec(df_muslim$son1,rec=
                            "1=5 [5];
                                  2=4 [4];
                                  3=3 [3];
                                  4=2 [2];
                                  5=1 [1];
                                  6=NA", 
                          append = FALSE)
set_label(df_muslim$weakties) <- "Weak Ties"
df_muslim$weakties_cent <-scale(df_muslim$weakties, center = TRUE, scale = FALSE)
df_muslim$weakties_cent <- as.numeric(as.character(df_muslim$weakties_cent))

### 2.2.16. Interaction term: Strong ties x weak ties----
df_muslim$interaction_ties <- df_muslim$strongties_cent * df_muslim$weakties_cent

### 2.2.17. Countries----
df_muslim$country <- remove_all_labels(df_muslim$country)
df_muslim$countries <- rec(df_muslim$country,rec=
                             "1=1 [Belgium];
                      2=2 [Switzerland];
                      3=0 [Germany];
                      4=4 [France];
                      5=5 [United Kingdom];
                      6=6 [The Netherlands]", 
                           append = FALSE)
df_muslim$countries<- set_labels(df_muslim$countries, 
                                 labels = c("Belgium" = 1,
                                            "Switzerland" = 2,
                                            "Germany" = 0,
                                            "France" = 4,
                                            "United Kingdom" = 5,
                                            "The Netherlands" = 6))
df_muslim$countries <- to_factor(df_muslim$countries)

### 2.2.18. Groups and countries----
df_muslim$country_origin <- 0
df_muslim$country_origin[df_muslim$countries == 1 & df_muslim$groups == 0] <- 1 #Belgian-Turks
df_muslim$country_origin[df_muslim$countries == 1 & df_muslim$groups == 1] <- 2 #Belgian-Yugoslavs
df_muslim$country_origin[df_muslim$countries == 1 & df_muslim$groups == 2] <- 3 #Belgian-Moroccans
df_muslim$country_origin[df_muslim$countries == 1 & df_muslim$groups == 3] <- 4 #Belgian-Pakistanis

df_muslim$country_origin[df_muslim$countries == 0 & df_muslim$groups == 0] <- 5 #German-Turks
df_muslim$country_origin[df_muslim$countries == 0 & df_muslim$groups == 1] <- 6 #German-Yugoslavs
df_muslim$country_origin[df_muslim$countries == 0 & df_muslim$groups == 2] <- 7 #German-Moroccans
df_muslim$country_origin[df_muslim$countries == 0 & df_muslim$groups == 3] <- 8 #German-Pakistanis

df_muslim$country_origin[df_muslim$countries == 2 & df_muslim$groups == 0] <- 9 #Swiss-Turks
df_muslim$country_origin[df_muslim$countries == 2 & df_muslim$groups == 1] <- 10 #Swiss-Yugoslavs
df_muslim$country_origin[df_muslim$countries == 2 & df_muslim$groups == 2] <- 11 #Swiss-Moroccans
df_muslim$country_origin[df_muslim$countries == 2 & df_muslim$groups == 3] <- 12 #Swiss-Pakistanis

df_muslim$country_origin[df_muslim$countries == 4 & df_muslim$groups == 0] <- 13 #French-Turks
df_muslim$country_origin[df_muslim$countries == 4 & df_muslim$groups == 1] <- 14 #French-Yugoslavs
df_muslim$country_origin[df_muslim$countries == 4 & df_muslim$groups == 2] <- 15 #French-Moroccans
df_muslim$country_origin[df_muslim$countries == 4 & df_muslim$groups == 3] <- 16 #French-Pakistanis

df_muslim$country_origin[df_muslim$countries == 5 & df_muslim$groups == 0] <- 17 #British-Turks
df_muslim$country_origin[df_muslim$countries == 5 & df_muslim$groups == 1] <- 18 #British-Yugoslavs
df_muslim$country_origin[df_muslim$countries == 5 & df_muslim$groups == 2] <- 19 #British-Moroccans
df_muslim$country_origin[df_muslim$countries == 5 & df_muslim$groups == 3] <- 20 #British-Pakistanis

df_muslim$country_origin[df_muslim$countries == 6 & df_muslim$groups == 0] <- 21 #Dutch-Turks
df_muslim$country_origin[df_muslim$countries == 6 & df_muslim$groups == 1] <- 22 #Dutch-Yugoslavs
df_muslim$country_origin[df_muslim$countries == 6 & df_muslim$groups == 2] <- 23 #Dutch-Moroccans
df_muslim$country_origin[df_muslim$countries == 6 & df_muslim$groups == 3] <- 24 #Dutch-Pakistanis

df_muslim$country_origin <- set_labels(df_muslim$country_origin,
                                       labels = c("Turks (Belgium)" = 1,
                                                  "Ex-Yugoslavs (Belgium)" = 2,
                                                  "Moroccans (Belgium)" = 3,
                                                  "Pakistanis (Belgium)" = 4,
                                                  "Turks (Germany)" = 5,
                                                  "Ex-Yugoslavs (Germany)" = 6,
                                                  "Moroccans (Germany)" = 7,
                                                  "Pakistanis (Germany)" = 8,
                                                  "Turks (Switzerland)" = 9,
                                                  "Ex-Yugoslavs (Switzerland)" = 10,
                                                  "Moroccans (Switzerland)" = 11,
                                                  "Pakistanis (Switzerland)" = 12,
                                                  "Turks (France)" = 13,
                                                  "Ex-Yugoslavs (France)" = 14,
                                                  "Moroccans (France)" = 15,
                                                  "Pakistanis (France)" = 16,
                                                  "Turks (the UK)" = 17,
                                                  "Ex-Yugoslavs (the UK)" = 18,
                                                  "Moroccans (the UK)" = 19,
                                                  "Pakistanis (the UK)" = 20,
                                                  "Turks (the Netherlands)" = 21,
                                                  "Ex-Yugoslavs (the Netherlands)" = 22,
                                                  "Moroccans (the Netherlands)" = 23,
                                                  "Pakistanis (the Netherlands)" = 24))
df_muslim$country_origin <- to_factor(df_muslim$country_origin)

# 3. MODELS----
## 3.1. Mixed-effects logistic regression models----
### 3.1.1 Odd-ratios----
glmfit1 <- glmer(intermaryes ~ man + age_cent + married + (1 | country_origin) + 
                   onefivegen + secondgen + secondaryschool + tertiaryschool + employed,
                 data = df_muslim, 
                 family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
glmfit2 <- glmer(intermaryes ~ man + age_cent +  married + (1 | country_origin) + 
                   onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
                   langprof_cent + percdistance_cent +
                   subj_relig_cent + indiv_relig_cent + com_relig_cent + 
                   identityhome_cent + identityhost_cent,
                 data = df_muslim, 
                 family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
glmfit3 <- glmer(intermaryes ~ man + age_cent +  married + (1 | country_origin) + 
                   onefivegen + secondgen + secondaryschool + tertiaryschool + employed + 
                   langprof_cent + percdistance_cent +
                   subj_relig_cent + indiv_relig_cent + com_relig_cent  + 
                   identityhome_cent + identityhost_cent +
                   strongties_cent + weakties_cent,
                 data = df_muslim, 
                 family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
glmfit4 <- glmer(intermaryes ~ man + age_cent +  married + (1 | country_origin) + 
                   onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
                   langprof_cent + percdistance_cent +
                   subj_relig_cent + indiv_relig_cent + com_relig_cent + 
                   identityhome_cent + identityhost_cent +
                   strongties_cent*weakties_cent,
                 data = df_muslim, 
                 family = binomial(link="logit"), 
                 glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
tab_model(glmfit1,glmfit2,glmfit3,glmfit4,
          collapse.ci = T, 
          show.p = T, p.style = "numeric_stars", p.threshold = c(0.05, 0.01, 0.001), show.loglik = TRUE, 
          show.ci = 0.95, show.intercept = TRUE,
          show.aic = T, show.icc = T, show.dev = FALSE, show.se = T, show.obs = TRUE)

### 3.1.2 Average marginal effects (AMEs)----
ame_glmfit1<- margins(glmfit1)
summary(ame_glmfit1)
ame_glmfit2<- margins(glmfit2)
summary(ame_glmfit2)
ame_glmfit3<- margins(glmfit3)
summary(ame_glmfit3)
ame_glmfit4<- margins(glmfit4)
summary(ame_glmfit4)

summary(glmfit4)


## 3.2. Checking the performance of models----
### 3.2.1. Log-Likelihood----
lrtest(glmfit3, glmfit4)

### 3.2.2. Multicollinearity (VIF)----
check_collinearity(glmfit4)

### 3.2.3. ICC----
var_random <- as.numeric(VarCorr(glmfit4)$country_origin[1])
var_residual <- pi^2 / 3
icc <- var_random / (var_random + var_residual)
print(icc)

### 3.2.4. BIC----
model_bic1 <- BIC(glmfit1)
print(model_bic1)
model_bic2 <- BIC(glmfit2)
print(model_bic2)
model_bic3 <- BIC(glmfit3)
print(model_bic3)
model_bic4 <- BIC(glmfit4)
print(model_bic4)


### 3.2.5. Likelihood-ratio test----
# Pooled logit model with no random effects
glmfit4a  <- glm(
  intermaryes ~ man + age_cent + married +
    onefivegen + secondgen +
    secondaryschool + tertiaryschool + employed +
    langprof_cent + percdistance_cent +
    subj_relig_cent + indiv_relig_cent + com_relig_cent +
    identityhome_cent + identityhost_cent +
    strongties_cent*weakties_cent,
  data   = df_muslim,
  family = binomial(link = "logit")
)

# Cross-classified random-effects model 
glmfit4b <- glmer(
  intermaryes ~ man + age_cent + married +
    onefivegen + secondgen +
    secondaryschool + tertiaryschool + employed +
    langprof_cent + percdistance_cent +
    subj_relig_cent + indiv_relig_cent + com_relig_cent +
    identityhome_cent + identityhost_cent +
    strongties_cent*weakties_cent +
    (1 | countries) +          # HOST (6)
    (1 | groups),              # ORIGIN (4)
  data    = df_muslim,
  family  = binomial(link = "logit"),
  control = glmerControl(optimizer = "bobyqa",
                         optCtrl   = list(maxfun = 10000))
)

LL_glmfit4 <- logLik(glmfit4)
LL_glmfit4a  <- logLik(glmfit4a)

chisq <- 2 * (LL_glmfit4 - LL_glmfit4a)       
df    <- attr(LL_glmfit4, "df") - attr(LL_glmfit4a, "df")
pval  <- pchisq(chisq, df = df, lower.tail = FALSE)

cat(sprintf(
  "Δχ² = %.2f (df = %d), p = %.3g", chisq, df, pval
))

AIC(glmfit4, glmfit4a, glmfit4b)
BIC(glmfit4, glmfit4a, glmfit4b)

## 3.3. The comparison of effect sizes (H1)----
AME_strong <- 0.0161
AME_weak <- 0.0215
se_strong <- 0.0071
se_weak <- 0.0069
diff_AME <- AME_weak - AME_strong
se_diff_AME <- sqrt(se_strong^2 + se_weak^2)
z_score_AME <- diff_AME / se_diff_AME
p_value_AME <- 2 * pnorm(-abs(z_score_AME))
p_value_AME

## 3.4. Contrast test (H2)----
emmeans_glmfit4 <- emmeans(glmfit4, specs = ~ strongties_cent * weakties_cent)
levels_to_compare <- list(
  `21% gap strong` = list(strongties_cent = 2.15, weakties_cent = -2.37),
  `21% gap weak` = list(strongties_cent = -1.85, weakties_cent = 1.63),
  `13% gap strong` = list(strongties_cent = 2.15, weakties_cent = 1.63),
  `13% gap weak` = list(strongties_cent = -1.85, weakties_cent = -2.37)
)
contrast_test <- emmeans_glmfit4 %>% 
  contrast(method = "revpairwise", levels_to_compare)
summary(contrast_test)

## 3.5. Marginal effects----
ggpredict(glmfit4, c("weakties_cent", "strongties_cent"))
ggpredict(glmfit4, c("strongties_cent", "weakties_cent"))

# 4. FIGURES----
## 4.1. Figure 1---- 
# Predicted Probabilities of Interfaith Marriage Approval by Strong and Weak Ties
set_theme(
  base = theme_bw(),
  title.align = "center",
  axis.title.size = 1.4,
  geom.linetype =3,
  geom.outline.color = "black", 
  geom.outline.size = 2, 
  geom.label.size = 2,
  geom.label.color = "black",
  title.color = "black", 
  title.size = 1.5, 
  axis.angle.x = 0, 
  axis.textcolor = "black",
)

pp1a <- plot_model(glmfit4, type="pred", line.size=1.5, axis.lim=c(.60, .90), 
                   grid.breaks = 6, colors ="bw",title = "(A) Strong ties X Weak ties", show.legend = F, ci.lvl = NA,
                   axis.title = "Interfaith marriage approval", 
                   axis.labels =T, terms = c( "weakties_cent", "strongties_cent[-1.85, 2.15]")) +
  scale_x_continuous(breaks = c(-2.37, -1.37, -0.37, 0.63, 1.63),
                     label = c("Up to 1", "Minority", "Half", "Majority", "All"),
                     labs(title="Interfaith marriage approval", y="Weak ties")) 

pp1b <- plot_model(glmfit4, type="pred", line.size=1.5, axis.lim=c(.60, .90), 
                   grid.breaks = 6, colors ="bw",title = "(B) Weak ties X Strong ties", show.legend = F, ci.lvl = NA,
                   axis.title = "Interfaith marriage approval", 
                   axis.labels =T, terms = c( "strongties_cent", "weakties_cent[-2.37, 1.63]")) +
  scale_x_continuous(breaks = c(-1.85, -0.85, 0.15, 1.15, 2.15),
                     label = c("Up to 1", "Minority", "Half", "Majority", "All"),
                     labs(title="Interfaith marriage approval", y="Strong ties")) 

grid.arrange(pp1a, pp1b,
             ncol = 2)

## 4.2. Figure 2----
# Predicted Probabilities of Interfaith Marriage Approval by Migrant Groups, 
# Host Countries, and Host Countries x Migrant Groups
groups_order    <- c("0","1","2","3")   # Turks=0, Ex-Yugoslavs=1, Moroccans=2, Pakistanis=3
groups_labels   <- c("Turks","Ex-Yugoslavs","Moroccans","Pakistanis")
names(groups_labels) <- groups_order

countries_order  <- c("1","0","2","4","5","6")  # BE=1, DE=0, CH=2, FR=4, GB=5, NL=6
countries_labels <- c("BE","DE","CH","FR","GB","NL")
names(countries_labels) <- countries_order

df_muslim <- df_muslim %>%
  mutate(
    groups    = factor(groups),
    countries = factor(countries),
    man = factor(man), married = factor(married),
    onefivegen = factor(onefivegen), secondgen = factor(secondgen),
    secondaryschool = factor(secondaryschool), tertiaryschool = factor(tertiaryschool),
    employed = factor(employed)
  ) %>%
  droplevels()

# Model A: Groups only (random intercept for countries)
glmfit_groups <- glmer(
  intermaryes ~ man + age_cent + married +
    onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
    langprof_cent + percdistance_cent +
    subj_relig_cent + indiv_relig_cent + com_relig_cent +
    identityhome_cent + identityhost_cent +
    strongties_cent*weakties_cent +
    groups + (1 | countries),
  data = df_muslim,
  family = binomial(link="logit"),
  glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=10000))
)

pred_groups <- expand.grid(
  man = factor(1, levels = levels(df_muslim$man)),
  age_cent = mean(df_muslim$age_cent, na.rm=TRUE),
  married = factor(1, levels = levels(df_muslim$married)),
  onefivegen = factor(1, levels = levels(df_muslim$onefivegen)),
  secondgen = factor(1, levels = levels(df_muslim$secondgen)),
  secondaryschool = factor(1, levels = levels(df_muslim$secondaryschool)),
  tertiaryschool = factor(1, levels = levels(df_muslim$tertiaryschool)),
  employed = factor(1, levels = levels(df_muslim$employed)),
  countries = factor(1, levels = levels(df_muslim$countries)),
  langprof_cent = mean(df_muslim$langprof_cent, na.rm=TRUE),
  subj_relig_cent = mean(df_muslim$subj_relig_cent, na.rm=TRUE),
  indiv_relig_cent = mean(df_muslim$indiv_relig_cent, na.rm=TRUE),
  com_relig_cent = mean(df_muslim$com_relig_cent, na.rm=TRUE),
  percdistance_cent = mean(df_muslim$percdistance_cent, na.rm=TRUE),
  identityhome_cent = mean(df_muslim$identityhome_cent, na.rm=TRUE),
  identityhost_cent = mean(df_muslim$identityhost_cent, na.rm=TRUE),
  strongties_cent = mean(df_muslim$strongties_cent, na.rm=TRUE),
  weakties_cent   = mean(df_muslim$weakties_cent,   na.rm=TRUE),
  groups = levels(df_muslim$groups)
)
pred_groups$predicted_prob <- predict(glmfit_groups, newdata=pred_groups, type="response")

# Model B: Countries only (random intercept for groups)
glmfit_countries <- glmer(
  intermaryes ~ man + age_cent + married +
    onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
    langprof_cent + percdistance_cent +
    subj_relig_cent + indiv_relig_cent + com_relig_cent +
    identityhome_cent + identityhost_cent +
    strongties_cent*weakties_cent +
    countries + (1 | groups),
  data = df_muslim,
  family = binomial(link="logit"),
  glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=10000))
)

pred_countries <- expand.grid(
  man = factor(1, levels = levels(df_muslim$man)),
  age_cent = mean(df_muslim$age_cent, na.rm=TRUE),
  married = factor(1, levels = levels(df_muslim$married)),
  onefivegen = factor(1, levels = levels(df_muslim$onefivegen)),
  secondgen = factor(1, levels = levels(df_muslim$secondgen)),
  secondaryschool = factor(1, levels = levels(df_muslim$secondaryschool)),
  tertiaryschool = factor(1, levels = levels(df_muslim$tertiaryschool)),
  employed = factor(1, levels = levels(df_muslim$employed)),
  groups = factor(1, levels = levels(df_muslim$groups)),
  langprof_cent = mean(df_muslim$langprof_cent, na.rm=TRUE),
  subj_relig_cent = mean(df_muslim$subj_relig_cent, na.rm=TRUE),
  indiv_relig_cent = mean(df_muslim$indiv_relig_cent, na.rm=TRUE),
  com_relig_cent = mean(df_muslim$com_relig_cent, na.rm=TRUE),
  percdistance_cent = mean(df_muslim$percdistance_cent, na.rm=TRUE),
  identityhome_cent = mean(df_muslim$identityhome_cent, na.rm=TRUE),
  identityhost_cent = mean(df_muslim$identityhost_cent, na.rm=TRUE),
  strongties_cent = mean(df_muslim$strongties_cent, na.rm=TRUE),
  weakties_cent   = mean(df_muslim$weakties_cent,   na.rm=TRUE),
  countries = levels(df_muslim$countries)
)
pred_countries$predicted_prob <- predict(glmfit_countries, newdata=pred_countries, type="response")

# Model C for glmfit4 (with 1|country_origin)
pred_data <- expand.grid(
  man = factor(1, levels = levels(df_muslim$man)),
  age_cent = mean(df_muslim$age_cent, na.rm = TRUE),
  married = factor(1, levels = levels(df_muslim$married)),
  onefivegen = factor(1, levels = levels(df_muslim$onefivegen)),
  secondgen = factor(1, levels = levels(df_muslim$secondgen)),
  secondaryschool = factor(1, levels = levels(df_muslim$secondaryschool)),
  tertiaryschool = factor(1, levels = levels(df_muslim$tertiaryschool)),
  employed = factor(1, levels = levels(df_muslim$employed)),
  langprof_cent = mean(df_muslim$langprof_cent, na.rm = TRUE),
  subj_relig_cent = mean(df_muslim$subj_relig_cent, na.rm = TRUE),
  indiv_relig_cent = mean(df_muslim$indiv_relig_cent, na.rm = TRUE),
  com_relig_cent = mean(df_muslim$com_relig_cent, na.rm = TRUE),
  percdistance_cent = mean(df_muslim$percdistance_cent, na.rm = TRUE),
  identityhome_cent = mean(df_muslim$identityhome_cent, na.rm = TRUE),
  identityhost_cent = mean(df_muslim$identityhost_cent, na.rm = TRUE),
  strongties_cent = mean(df_muslim$strongties_cent, na.rm = TRUE),
  weakties_cent = mean(df_muslim$weakties_cent, na.rm = TRUE),
  country_origin = levels(df_muslim$country_origin)
)
pred_data$predicted_prob <- predict(glmfit4, newdata = pred_data, type = "response")

labels_vector <- c(
  "Turks (BE)"=1, "Ex-Yugoslavs (BE)"=2, "Moroccans (BE)"=3, "Pakistanis (BE)"=4,
  "Turks (DE)"=5, "Ex-Yugoslavs (DE)"=6, "Moroccans (DE)"=7, "Pakistanis (DE)"=8,
  "Turks (CH)"=9, "Ex-Yugoslavs (CH)"=10, "Moroccans (CH)"=11, "Pakistanis (CH)"=12,
  "Turks (FR)"=13, "Ex-Yugoslavs (FR)"=14, "Moroccans (FR)"=15, "Pakistanis (FR)"=16,
  "Turks (GB)"=17, "Ex-Yugoslavs (GB)"=18, "Moroccans (GB)"=19, "Pakistanis (GB)"=20,
  "Turks (NL)"=21, "Ex-Yugoslavs (NL)"=22, "Moroccans (NL)"=23, "Pakistanis (NL)"=24
)

df_groups <- tibble(
  block = "Groups",
  code  = groups_order,
  item  = groups_labels[code],
  p     = pred_groups$predicted_prob[match(code, as.character(levels(df_muslim$groups)))]
)

df_countries <- tibble(
  block = "Countries",
  code  = countries_order,
  item  = countries_labels[code],
  p     = pred_countries$predicted_prob[match(code, as.character(levels(df_muslim$countries)))]
)

combo_labels <- c(
  "Turks (BE)","Ex-Yugoslavs (BE)","Moroccans (BE)","Pakistanis (BE)",
  "Turks (DE)","Ex-Yugoslavs (DE)","Moroccans (DE)","Pakistanis (DE)",
  "Turks (CH)","Ex-Yugoslavs (CH)","Moroccans (CH)","Pakistanis (CH)",
  "Turks (FR)","Ex-Yugoslavs (FR)","Moroccans (FR)","Pakistanis (FR)",
  "Turks (GB)","Ex-Yugoslavs (GB)","Moroccans (GB)","Pakistanis (GB)",
  "Turks (NL)","Ex-Yugoslavs (NL)","Moroccans (NL)","Pakistanis (NL)"
)

df_combos <- tibble(
  block = "Groups × Countries",
  item  = combo_labels,
  p     = pred_data$predicted_prob
)

x_levels <- c(groups_labels[groups_order],
              countries_labels[countries_order],
              combo_labels)

plot_df <- bind_rows(
  select(df_groups, item, p),
  select(df_countries, item, p),
  select(df_combos, item, p)
) %>%
  mutate(item = factor(item, levels = x_levels))

sep1 <- length(groups_order) + 0.5
sep2 <- length(groups_order) + length(countries_order) + 0.5

ggplot(plot_df, aes(x = item, y = p)) +
  geom_point(size = 3) +
  geom_text(aes(label = sprintf("%.2f", p)), vjust = -0.6, size = 3.5) +
  scale_y_continuous(limits = c(0.70, 0.90), expand = expansion(mult = c(0, 0.05))) +
  labs(x = NULL, y = "Interfaith marriage approval") +
  theme_minimal(base_size = 14) +
  theme(axis.text.x = element_text(angle = 40, hjust = 1),
        axis.title.y = element_text(face = "bold"))

# 5. SENSITIVITY ANALYSES----
## 5.1. Bootstrapping----
ame_stat <- function(data, i, clusters) {
  resampled <- clusters[i] 
  dat_boot  <- data %>% filter(country_origin %in% resampled)
  
  fit <- glmer(
    intermaryes ~ man + age_cent + married + 
      onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
      langprof_cent + percdistance_cent +
      subj_relig_cent + indiv_relig_cent + com_relig_cent +
      strongties_cent + weakties_cent +
      interaction_ties +
      (1 | country_origin),
    data    = dat_boot,
    family  = binomial,
    control = glmerControl(optimizer = "bobyqa")
  )
  
  avg_slopes(
    fit,
    variables = c("strongties_cent", "weakties_cent", "interaction_ties"),
    vcov      = FALSE,
    re.form   = NA
  )$estimate
}

set.seed(123)
cluster_ids <- unique(df_muslim$country_origin)

boot_np <- boot(
  data      = df_muslim,
  statistic = ame_stat,
  R         = 1000,
  clusters  = cluster_ids,
  parallel  = "multicore",
  ncpus     = parallel::detectCores() - 1
)

colnames(boot_np$t) <- c("AME_strong", "AME_weak", "AME_interaction")

ame_point <- ame_stat(df_muslim, 1:nrow(df_muslim), cluster_ids)
boot_se <- apply(boot_np$t, 2, sd)
boot_ci <- t(apply(boot_np$t, 2, quantile, probs = c(.025, .975)))


z_val <- ame_point / boot_se
p_val <- 2 * pnorm(-abs(z_val))
sig <- ifelse(p_val < .001, "***",
              ifelse(p_val < .01,  "**",
                     ifelse(p_val < .05,  "*",
                            ifelse(p_val < .10,  "†", ""))))
result <- data.frame(
  AME  = round(ame_point, 4),
  SE   = round(boot_se, 4),
  L95  = round(boot_ci[, 1], 4),
  U95  = round(boot_ci[, 2], 4),
  p    = signif(p_val, 3),
  sig  = sig,
  row.names = c("Strong ties", "Weak ties", "Interaction")
)
print(result, right = FALSE)

## 5.2. Non-Linear Model ----
df_muslim <- df_muslim %>%
  mutate(
    strongties_sq = strongties_cent^2,
    weakties_sq = weakties_cent^2
  )

glmfit4_nonlinear <- glmer(intermaryes ~ man + age_cent +  married + (1 | country_origin) + 
                             onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
                             langprof_cent + percdistance_cent +
                             subj_relig_cent + indiv_relig_cent + com_relig_cent + 
                             identityhome_cent + identityhost_cent +
                             strongties_cent*weakties_cent +
                             strongties_sq + weakties_sq,
                           data = df_muslim, 
                           family = binomial(link="logit"), 
                           glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))
summary(glmfit4_nonlinear)


# 6. HETEROGENITY ANALYSES----
## 6.1. Pairwise Comparisons of Predicted Probabilities of Interfaith Marriage Approval----
# Model 1: Migrant Groups as Fixed Effect
glmfit_groups <- glmer(
  intermaryes ~ man + age_cent + married +
    onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
    langprof_cent + percdistance_cent +
    subj_relig_cent + indiv_relig_cent + com_relig_cent +
    identityhome_cent + identityhost_cent +
    strongties_cent*weakties_cent +
    groups + (1 | countries),
  data = df_muslim, family = binomial(link="logit"),
  glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=10000))
)

# Model 2: Host Countries as Fixed Effect
glmfit_countries <- glmer(
  intermaryes ~ man + age_cent + married +
    onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
    langprof_cent + percdistance_cent +
    subj_relig_cent + indiv_relig_cent + com_relig_cent +
    identityhome_cent + identityhost_cent +
    strongties_cent*weakties_cent +
    countries + (1 | groups),
  data = df_muslim, family = binomial(link="logit"),
  glmerControl(optimizer="bobyqa", optCtrl=list(maxfun=10000))
)

# Model 3: Groups within Countries (Fixed Effects Interaction)
glmfit_fixed <- glmer(intermaryes ~ man + age_cent +  married + 
                        onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
                        langprof_cent + percdistance_cent +
                        subj_relig_cent + indiv_relig_cent + com_relig_cent + 
                        identityhome_cent + identityhost_cent +
                        strongties_cent*weakties_cent +
                        groups * countries +
                        (1 | country_origin),
                      data = df_muslim, 
                      family = binomial(link="logit"), 
                      glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 10000)))

pairs_groups <- pairs(emmeans(glmfit_groups, specs = ~ groups, type = "response"))
pairs_countries <- pairs(emmeans(glmfit_countries, specs = ~ countries, type = "response"))
pairs_groups_by_country <- pairs(emmeans(glmfit_fixed, specs = ~ groups | countries, type = "response"))


stars <- function(p) {
  ifelse(is.na(p), "",
         ifelse(p < .001, "***",
                ifelse(p < .01,  "**",
                       ifelse(p < .05,  "*",
                              ifelse(p < .10,  "†", "")))))
}

# Process Test 1: Across Migrant Groups
table_groups <- as_tibble(pairs_groups) %>%
  filter(p.value < 0.10) %>% # Keep only significant results
  mutate(Result = sprintf("%.2f (%.2f)%s", odds.ratio, SE, stars(p.value)),
         Analysis = "A: Across Migrant Groups") %>%
  select(Analysis, Contrast = contrast, Result)

# Process Test 2: Across Host Countries
table_countries <- as_tibble(pairs_countries) %>%
  filter(p.value < 0.10) %>% # Keep only significant results
  mutate(Result = sprintf("%.2f (%.2f)%s", odds.ratio, SE, stars(p.value)),
         Analysis = "B: Across Host Countries") %>%
  select(Analysis, Contrast = contrast, Result)

# Process Test 3: Groups within each Country
table_groups_in_countries <- as_tibble(pairs_groups_by_country) %>%
  filter(p.value < 0.10) %>% # Keep only significant results
  mutate(Result = sprintf("%.2f (%.2f)%s", odds.ratio, SE, stars(p.value)),
         Analysis = paste0("C: Groups within ", countries)) %>%
  select(Analysis, Contrast = contrast, Result)

final_appendix_table <- bind_rows(
  table_groups,
  table_countries,
  table_groups_in_countries
)

if (nrow(final_appendix_table) > 0) {
  kbl(final_appendix_table,
      align = c("l", "l", "r"),
      caption = "Significant Pairwise Comparisons of Predicted Probabilities",
      booktabs = TRUE) %>%
    pack_rows(index = table(final_appendix_table$Analysis)) %>%
    kable_styling(bootstrap_options = "striped", font_size = 10, full_width = FALSE)
} else {
  cat("No statistically significant (p < .10) pairwise comparisons were found.")
}

## 6.2. Across-Context Heterogeneity of Tie Effects----
mods_bin  <- c("man","married","onefivegen","secondgen","secondaryschool","tertiaryschool","employed")
df_muslim <- df_muslim %>%
  mutate(across(all_of(c("groups", "countries", "country_origin", mods_bin)), factor))

orig_levels <- c("Turkish","Ex-Yugoslavian","Moroccan","Pakistani")
if (identical(levels(df_muslim$groups), as.character(0:3))) {
  levels(df_muslim$groups) <- orig_levels
}

cty_map <- c(`0`="Germany", `1`="Belgium", `2`="Switzerland",
             `4`="France", `5`="United Kingdom", `6`="The Netherlands")
if (all(names(cty_map) %in% levels(df_muslim$countries))) {
  levels(df_muslim$countries) <- unname(cty_map[levels(df_muslim$countries)])
}

covariates_base <-
  "man + age_cent + married + onefivegen + secondgen + secondaryschool + 
   tertiaryschool + employed + langprof_cent + percdistance_cent +
   subj_relig_cent + indiv_relig_cent + com_relig_cent +
   identityhome_cent + identityhost_cent"

fmla_cty <- as.formula(paste("intermaryes ~", covariates_base, "+ weakties_cent*countries + strongties_cent*countries + groups"))
fit_cty <- glm(fmla_cty, data = df_muslim, family = binomial())
V_cty <- sandwich::vcovCL(fit_cty, cluster = ~ country_origin, type = "HC1")
slopes_w_cty <- emmeans::emtrends(fit_cty, ~ countries, var = "weakties_cent", vcov. = V_cty)
slopes_s_cty <- emmeans::emtrends(fit_cty, ~ countries, var = "strongties_cent", vcov. = V_cty)

fmla_grp <- as.formula(paste("intermaryes ~", covariates_base, "+ weakties_cent*groups + strongties_cent*groups + countries"))
fit_grp <- glm(fmla_grp, data = df_muslim, family = binomial())
V_grp <- sandwich::vcovCL(fit_grp, cluster = ~ country_origin, type = "HC1")
slopes_w_grp <- emmeans::emtrends(fit_grp, ~ groups, var = "weakties_cent", vcov. = V_grp)
slopes_s_grp <- emmeans::emtrends(fit_grp, ~ groups, var = "strongties_cent", vcov. = V_grp)

stars <- function(p) {
  ifelse(is.na(p), "",
         ifelse(p < .001, "***",
                ifelse(p < .01,  "**",
                       ifelse(p < .05,  "*",
                              ifelse(p < .10,  "†", "")))))
}

process_and_format_results <- function(emm_summary, analysis_label) {
  df <- as_tibble(summary(emm_summary, infer = TRUE))
  df %>%
    mutate(
      `OR (SE)` = sprintf("%.2f (%.2f)%s", exp(.[[2]]), SE, stars(p.value)),
      Analysis = analysis_label
    ) %>%
    rename(Context = 1) %>%
    select(Analysis, Context, `OR (SE)`)
}

table_w_grp <- process_and_format_results(slopes_w_grp, "Across Migrant Groups: Weak Ties")
table_s_grp <- process_and_format_results(slopes_s_grp, "Across Migrant Groups: Strong Ties")
table_w_cty <- process_and_format_results(slopes_w_cty, "Across Countries: Weak Ties")
table_s_cty <- process_and_format_results(slopes_s_cty, "Across Countries: Strong Ties")

analysis_order <- c(
  "Across Migrant Groups: Weak Ties",
  "Across Migrant Groups: Strong Ties",
  "Across Countries: Weak Ties",
  "Across Countries: Strong Ties"
)

final_appendix_table <- bind_rows(
  table_w_grp,
  table_s_grp,
  table_w_cty,
  table_s_cty
) %>%
  mutate(Analysis = factor(Analysis, levels = analysis_order)) %>%
  arrange(Analysis)

kbl(final_appendix_table,
    align = c("l", "l", "r"),
    caption = "Across-Context Heterogeneity of Tie Effects",
    booktabs = TRUE) %>%
  pack_rows(index = table(final_appendix_table$Analysis)) %>%
  kable_styling(bootstrap_options = "striped", font_size = 10, full_width = FALSE)

## 6.3. Within-Origin and Within-Country Heterogeneity of Tie Effects (Omnibus Tests)----
options(contrasts = c("contr.treatment","contr.poly"))
mods_bin  <- c("man","married","onefivegen","secondgen","secondaryschool","tertiaryschool","employed")
mods_cont <- c("age_cent","langprof_cent","percdistance_cent",
               "subj_relig_cent","indiv_relig_cent","com_relig_cent",
               "identityhome_cent","identityhost_cent")
mods_all  <- c(mods_bin, mods_cont)

fixed_base <-
  "man + age_cent + married +
   onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
   langprof_cent + percdistance_cent +
   subj_relig_cent + indiv_relig_cent + com_relig_cent +
   identityhome_cent + identityhost_cent +
   strongties_cent*weakties_cent + groups + countries"

build_formula_weak <- function(m) {
  lower <- paste("weakties_cent:groups", paste0("weakties_cent:", m), paste0(m, ":groups"), sep = " + ")
  three <- paste0("weakties_cent:", m, ":groups")
  as.formula(paste("intermaryes ~", fixed_base, "+", lower, "+", three))
}

build_formula_strong <- function(m) {
  lower <- paste("strongties_cent:groups", paste0("strongties_cent:", m), paste0(m, ":groups"), sep = " + ")
  three <- paste0("strongties_cent:", m, ":groups")
  as.formula(paste("intermaryes ~", fixed_base, "+", lower, "+", three))
}

test_one_moderator <- function(m) {
  f_w <- build_formula_weak(m)
  fit_w <- glm(f_w, data = df_muslim, family = binomial())
  V_w <- sandwich::vcovCL(fit_w, cluster = ~ country_origin, type = "HC1")
  cn_w <- names(coef(fit_w))
  Lw   <- cn_w[grepl("weakties_cent", cn_w) & grepl(m, cn_w) & grepl("groups", cn_w)]
  lw   <- if(length(Lw)) car::linearHypothesis(fit_w, Lw, vcov. = V_w, test = "Chisq") else NULL
  
  f_s <- build_formula_strong(m)
  fit_s <- glm(f_s, data = df_muslim, family = binomial())
  V_s <- sandwich::vcovCL(fit_s, cluster = ~ country_origin, type = "HC1")
  cn_s <- names(coef(fit_s))
  Ls   <- cn_s[grepl("strongties_cent", cn_s) & grepl(m, cn_s) & grepl("groups", cn_s)]
  ls   <- if(length(Ls)) car::linearHypothesis(fit_s, Ls, vcov. = V_s, test = "Chisq") else NULL
  
  tibble(
    Moderator   = m,
    weak_chi2   = if(!is.null(lw)) as.numeric(lw$Chisq[2]) else NA_real_,
    weak_df     = if(!is.null(lw)) as.integer(lw$Df[2])    else NA_integer_,
    weak_p      = if(!is.null(lw)) as.numeric(lw$`Pr(>Chisq)`[2]) else NA_real_,
    strong_chi2 = if(!is.null(ls)) as.numeric(ls$Chisq[2]) else NA_real_,
    strong_df   = if(!is.null(ls)) as.integer(ls$Df[2])    else NA_integer_,
    strong_p    = if(!is.null(ls)) as.numeric(ls$`Pr(>Chisq)`[2]) else NA_real_
  )
}

raw_S1 <- map_df(mods_all, test_one_moderator)

stars <- function(p) ifelse(is.na(p), "",
                            ifelse(p < .001, "***",
                                   ifelse(p < .01,  "**",
                                          ifelse(p < .05,  "*",
                                                 ifelse(p < .10,  "†", "")))))

table_S1 <- raw_S1 %>%
  mutate(
    Weak = ifelse(is.na(weak_chi2), NA_character_,
                  sprintf("%.2f (%d)  p=%.3f %s", weak_chi2, weak_df, weak_p, stars(weak_p))),
    Strong = ifelse(is.na(strong_chi2), NA_character_,
                    sprintf("%.2f (%d)  p=%.3f %s", strong_chi2, strong_df, strong_p, stars(strong_p)))
  ) %>%
  transmute(
    Moderator,
    `Weak ties × mod × groups`   = Weak,
    `Strong ties × mod × groups` = Strong,
    Method = "GLM-robust"
  )

kable(
  table_S1,
  align = "lccc",
  caption = "Within-origin heterogeneity of tie effects (omnibus tests)."
)

mods_bin  <- c("man","married","onefivegen","secondgen","secondaryschool","tertiaryschool","employed")
mods_cont <- c("age_cent","langprof_cent","percdistance_cent",
               "subj_relig_cent","indiv_relig_cent","com_relig_cent",
               "identityhome_cent","identityhost_cent")
mods_all  <- c(mods_bin, mods_cont)

fixed_base <-
  "man + age_cent + married +
   onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
   langprof_cent + percdistance_cent +
   subj_relig_cent + indiv_relig_cent + com_relig_cent +
   identityhome_cent + identityhost_cent +
   strongties_cent*weakties_cent + groups + countries"

build_formula_weak_cty <- function(m) {
  lower <- paste("weakties_cent:countries", paste0("weakties_cent:", m), paste0(m, ":countries"), sep = " + ")
  three <- paste0("weakties_cent:", m, ":countries")
  as.formula(paste("intermaryes ~", fixed_base, "+", lower, "+", three))
}

build_formula_strong_cty <- function(m) {
  lower <- paste("strongties_cent:countries", paste0("strongties_cent:", m), paste0(m, ":countries"), sep = " + ")
  three <- paste0("strongties_cent:", m, ":countries")
  as.formula(paste("intermaryes ~", fixed_base, "+", lower, "+", three))
}

test_one_moderator_cty <- function(m) {
  f_w <- build_formula_weak_cty(m)
  fit_w <- glm(f_w, data = df_muslim, family = binomial())
  V_w <- sandwich::vcovCL(fit_w, cluster = ~ country_origin, type = "HC1")
  cn_w <- names(coef(fit_w))
  Lw   <- cn_w[grepl("weakties_cent", cn_w) & grepl(m, cn_w) & grepl("countries", cn_w)]
  lw   <- if(length(Lw)) car::linearHypothesis(fit_w, Lw, vcov. = V_w, test = "Chisq") else NULL
  
  f_s <- build_formula_strong_cty(m)
  fit_s <- glm(f_s, data = df_muslim, family = binomial())
  V_s <- sandwich::vcovCL(fit_s, cluster = ~ country_origin, type = "HC1")
  cn_s <- names(coef(fit_s))
  Ls   <- cn_s[grepl("strongties_cent", cn_s) & grepl(m, cn_s) & grepl("countries", cn_s)]
  ls   <- if(length(Ls)) car::linearHypothesis(fit_s, Ls, vcov. = V_s, test = "Chisq") else NULL
  
  tibble(
    Moderator   = m,
    weak_chi2   = if(!is.null(lw)) as.numeric(lw$Chisq[2]) else NA_real_,
    weak_df     = if(!is.null(lw)) as.integer(lw$Df[2])    else NA_integer_,
    weak_p      = if(!is.null(lw)) as.numeric(lw$`Pr(>Chisq)`[2]) else NA_real_,
    strong_chi2 = if(!is.null(ls)) as.numeric(ls$Chisq[2]) else NA_real_,
    strong_df   = if(!is.null(ls)) as.integer(ls$Df[2])    else NA_integer_,
    strong_p    = if(!is.null(ls)) as.numeric(ls$`Pr(>Chisq)`[2]) else NA_real_
  )
}

raw_S3 <- map_df(mods_all, test_one_moderator_cty)

stars <- function(p) ifelse(is.na(p), "",
                            ifelse(p < .001, "***",
                                   ifelse(p < .01,  "**",
                                          ifelse(p < .05,  "*",
                                                 ifelse(p < .10,  "†", "")))))

table_S3 <- raw_S3 %>%
  mutate(
    Weak = ifelse(is.na(weak_chi2), NA_character_,
                  sprintf("%.2f (%d)  p=%.3f %s", weak_chi2, weak_df, weak_p, stars(weak_p))),
    Strong = ifelse(is.na(strong_chi2), NA_character_,
                    sprintf("%.2f (%d)  p=%.3f %s", strong_chi2, strong_df, strong_p, stars(strong_p)))
  ) %>%
  transmute(
    Moderator,
    `Weak ties × mod × countries`   = Weak,
    `Strong ties × mod × countries` = Strong,
    Method = "GLM-robust"
  )

kable(
  table_S3,
  align = "lccc",
  caption = "Within-country heterogeneity of tie effects (omnibus tests)."
)

table_both <-
  table_S1 %>%
  select(Moderator,
         `Weak ties × mod × groups`,
         `Strong ties × mod × groups`) %>%
  left_join(
    table_S3 %>%
      select(Moderator,
             `Weak ties × mod × countries`,
             `Strong ties × mod × countries`),
    by = "Moderator"
  ) %>%
  mutate(Moderator = factor(Moderator, levels = mods_all)) %>%
  arrange(Moderator) %>%
  mutate(Moderator = as.character(Moderator))

knitr::kable(
  table_both,
  align = "lcccc",
  caption = "Within-origin and within-country heterogeneity of tie effects (omnibus tests)."
)

kbl(table_both, align = "lcccc",
    caption = "Within-origin and within-country heterogeneity of tie effects (omnibus tests).") %>%
  add_header_above(c(" " = 1, "Groups" = 2, "Countries" = 2))

## 6.4. Within-Migrant-Group and Within-Host-Country Heterogeneity of Tie Effects (Simple Slopes)----
mods_bin  <- c("man","married","onefivegen","secondgen","secondaryschool","tertiaryschool","employed")
mods_cont <- c("age_cent","langprof_cent","percdistance_cent",
               "subj_relig_cent","indiv_relig_cent","com_relig_cent",
               "identityhome_cent","identityhost_cent")
mods_all  <- c(
  "man", "age_cent", "married", "onefivegen", "secondgen", "secondaryschool",
  "tertiaryschool", "employed", "langprof_cent", "percdistance_cent",
  "subj_relig_cent", "indiv_relig_cent", "com_relig_cent",
  "identityhome_cent", "identityhost_cent"
)

df_muslim <- df_muslim %>%
  mutate(
    across(all_of(c("groups", "countries", mods_bin)), factor)
  )

all_origins <- levels(df_muslim$groups)
all_countries <- levels(df_muslim$countries)

abbr_groups   <- c("Turkish"="TUR","Ex-Yugoslavian"="YUG","Moroccan"="MOR","Pakistani"="PAK")
abbr_countries<- c("Belgium"="BE", "Germany"="DE", "Switzerland"="CH",
                   "France"="FR", "United Kingdom"="GB", "The Netherlands"="NL")

fixed_base <-
  "man + age_cent + married +
   onefivegen + secondgen + secondaryschool + tertiaryschool + employed +
   langprof_cent + percdistance_cent +
   subj_relig_cent + indiv_relig_cent + com_relig_cent +
   identityhome_cent + identityhost_cent +
   strongties_cent*weakties_cent + groups + countries"

stars <- function(p) ifelse(is.na(p), "", ifelse(p<.001,"***", ifelse(p<.01,"**", ifelse(p<.05,"*", ifelse(p<.10,"†","")))))
is_binary_mod <- function(v){ is.factor(df_muslim[[v]]) }
ensure_cols <- function(df, cols){ for (c in cols) if (!c %in% names(df)) df[[c]] <- NA_character_; dplyr::select(df, Moderator, dplyr::all_of(cols)) }
fmt_or_ci <- function(or, lcl, ucl, p, prefix=""){ paste0(prefix, sprintf("%.2f%s", or, stars(p))) }

build_formula_g <- function(tie, m) { as.formula(paste("intermaryes ~", fixed_base, "+", paste(tie, m, "groups", sep="*"))) }
test_one_g <- function(m) {
  f_w <- build_formula_g("weakties_cent", m)
  fit_w <- glm(f_w, data = df_muslim, family = binomial())
  V_w <- sandwich::vcovCL(fit_w, cluster = ~ country_origin, type = "HC1")
  hyp_vars_w <- names(coef(fit_w))[grepl("weakties_cent:", names(coef(fit_w))) & grepl(m, names(coef(fit_w))) & grepl("groups", names(coef(fit_w)))]
  p_w <- if(length(hyp_vars_w) > 0) car::linearHypothesis(fit_w, hyp_vars_w, vcov. = V_w, test = "Chisq")$`Pr(>Chisq)`[2] else NA_real_
  
  f_s <- build_formula_g("strongties_cent", m)
  fit_s <- glm(f_s, data = df_muslim, family = binomial())
  V_s <- sandwich::vcovCL(fit_s, cluster = ~ country_origin, type = "HC1")
  hyp_vars_s <- names(coef(fit_s))[grepl("strongties_cent:", names(coef(fit_s))) & grepl(m, names(coef(fit_s))) & grepl("groups", names(coef(fit_s)))]
  p_s <- if(length(hyp_vars_s) > 0) car::linearHypothesis(fit_s, hyp_vars_s, vcov. = V_s, test = "Chisq")$`Pr(>Chisq)`[2] else NA_real_
  
  tibble(Moderator = m, weak_groups_p = p_w, strong_groups_p = p_s)
}

build_formula_c <- function(tie, m) { as.formula(paste("intermaryes ~", fixed_base, "+", paste(tie, m, "countries", sep="*"))) }
test_one_c <- function(m) {
  f_w <- build_formula_c("weakties_cent", m)
  fit_w <- glm(f_w, data = df_muslim, family = binomial())
  V_w <- sandwich::vcovCL(fit_w, cluster = ~ country_origin, type = "HC1")
  hyp_vars_w <- names(coef(fit_w))[grepl("weakties_cent:", names(coef(fit_w))) & grepl(m, names(coef(fit_w))) & grepl("countries", names(coef(fit_w)))]
  p_w <- if(length(hyp_vars_w) > 0) car::linearHypothesis(fit_w, hyp_vars_w, vcov. = V_w, test = "Chisq")$`Pr(>Chisq)`[2] else NA_real_
  
  f_s <- build_formula_c("strongties_cent", m)
  fit_s <- glm(f_s, data = df_muslim, family = binomial())
  V_s <- sandwich::vcovCL(fit_s, cluster = ~ country_origin, type = "HC1")
  hyp_vars_s <- names(coef(fit_s))[grepl("strongties_cent:", names(coef(fit_s))) & grepl(m, names(coef(fit_s))) & grepl("countries", names(coef(fit_s)))]
  p_s <- if(length(hyp_vars_s) > 0) car::linearHypothesis(fit_s, hyp_vars_s, vcov. = V_s, test = "Chisq")$`Pr(>Chisq)`[2] else NA_real_
  
  tibble(Moderator = m, weak_countries_p = p_w, strong_countries_p = p_s)
}

omni_g <- map_df(mods_all, test_one_g)
omni_c <- map_df(mods_all, test_one_c)

sig_thresh <- 0.10
gate <- omni_g %>%
  left_join(omni_c, by = "Moderator") %>%
  transmute(
    Moderator,
    show_weak_groups = !is.na(weak_groups_p) & weak_groups_p < sig_thresh,
    show_strong_groups = !is.na(strong_groups_p) & strong_groups_p < sig_thresh,
    show_weak_countries = !is.na(weak_countries_p) & weak_countries_p < sig_thresh,
    show_strong_countries = !is.na(strong_countries_p) & strong_countries_p < sig_thresh
  )

fit_tie_mod <- function(tie, moderator, domain = "groups") {
  fmla <- if (domain == "groups") build_formula_g(tie, moderator) else build_formula_c(tie, moderator)
  m <- glm(fmla, data = df_muslim, family = binomial())
  V <- sandwich::vcovCL(m, cluster = ~ country_origin, type = "HC1")
  list(model = m, vcov = V, tie = tie, moderator = moderator)
}

simple_slopes_cont_num <- function(fit, by) {
  spec_var <- if (by == "groups") "groups" else "countries"
  key <- if (by == "groups") "Origin" else "Country"
  mod <- fit$moderator; tie <- fit$tie
  sd_mod <- sd(df_muslim[[mod]], na.rm = TRUE); grid <- c(-sd_mod, 0, sd_mod)
  spec <- as.formula(paste("~", spec_var, "*", mod))
  sm <- emtrends(fit$model, specs = spec, var = tie, at = setNames(list(grid), mod), vcov. = fit$vcov) %>%
    summary(infer = TRUE) %>% as.data.frame()
  sm %>% transmute(!!key := .data[[spec_var]],
                   cond = c("-1 SD", "Mean", "+1 SD")[match(round(.data[[mod]], 4), round(grid, 4))],
                   est = .data[[paste0(tie, ".trend")]], p = p.value)
}

simple_slopes_bin_num <- function(fit, by) {
  spec_var <- if (by == "groups") "groups" else "countries"
  key <- if (by == "groups") "Origin" else "Country"
  mod <- fit$moderator; tie <- fit$tie
  spec <- as.formula(paste("~", spec_var, "*", mod))
  sm <- emtrends(fit$model, specs = spec, var = tie, vcov. = fit$vcov) %>%
    summary(infer = TRUE) %>% as.data.frame()
  sm %>% transmute(!!key := .data[[spec_var]], cond = as.character(.data[[mod]]),
                   est = .data[[paste0(tie, ".trend")]], p = p.value)
}

summarize_cont <- function(df_num, moderator_name, which_cols) {
  if (is.null(df_num) || nrow(df_num) == 0) return(ensure_cols(tibble(Moderator = moderator_name), which_cols))
  key <- names(df_num)[1]
  df_sig_groups <- df_num %>% group_by(!!sym(key)) %>% filter(any(p < sig_thresh, na.rm = TRUE)) %>% ungroup()
  if (nrow(df_sig_groups) == 0) return(ensure_cols(tibble(Moderator = moderator_name), which_cols))
  
  df_processed <- df_sig_groups %>%
    mutate(formatted_or = sprintf("%.2f%s", exp(est), stars(p))) %>%
    select(!!sym(key), cond, formatted_or) %>%
    mutate(cond = factor(cond, levels = c("-1 SD", "Mean", "+1 SD"))) %>%
    pivot_wider(names_from = cond, values_from = formatted_or, values_fill = NA_character_) %>%
    unite(col = "cell", c("-1 SD", "Mean", "+1 SD"), sep = "/", na.rm = FALSE) %>%
    mutate(cell = gsub("NA", " ", cell))
  
  wide <- pivot_wider(df_processed, names_from = !!sym(key), values_from = cell)
  tibble(Moderator = moderator_name) %>% bind_cols(wide) %>% ensure_cols(., which_cols)
}

summarize_bin <- function(df_num, moderator_name, which_cols) {
  if (is.null(df_num) || nrow(df_num) == 0) return(ensure_cols(tibble(Moderator = moderator_name), which_cols))
  df_sig <- df_num %>% filter(!is.na(p) & p < sig_thresh)
  if (nrow(df_sig) == 0) return(ensure_cols(tibble(Moderator = moderator_name), which_cols))
  key <- names(df_num)[1]
  
  df_pick <- df_sig %>%
    filter(cond == "1") %>% 
    mutate(OR = exp(est), cell = fmt_or_ci(OR, NA, NA, p)) %>%
    select(!!sym(key), cell)
  
  wide <- pivot_wider(df_pick, names_from = !!sym(key), values_from = cell)
  tibble(Moderator = moderator_name) %>% bind_cols(wide) %>% ensure_cols(., which_cols)
}

build_block <- function(tie, domain) {
  which_cols <- if (domain == "groups") all_origins else all_countries
  
  bind_rows(lapply(mods_all, function(m) {
    gate_col <- paste0("show_", sub("ties_cent", "", tie), "_", domain)
    if (!gate[[gate_col]][gate$Moderator == m]) {
      return(ensure_cols(tibble(Moderator = m), which_cols))
    }
    
    fit <- fit_tie_mod(tie, m, domain)
    if (is_binary_mod(m)) {
      num <- simple_slopes_bin_num(fit, by = domain)
      summarize_bin(num, m, which_cols)
    } else {
      num <- simple_slopes_cont_num(fit, by = domain)
      summarize_cont(num, m, which_cols)
    }
  }))
}

format_moderator_names <- function(df) {
  df %>% mutate(Moderator = case_when(
    Moderator == "onefivegen" ~ "1.5-generation",
    Moderator == "secondgen" ~ "2nd-generation",
    Moderator == "secondaryschool" ~ "Secondary school",
    Moderator == "tertiaryschool" ~ "Tertiary school",
    Moderator == "langprof_cent" ~ "Language proficiency",
    Moderator == "percdistance_cent" ~ "Perceived distance",
    Moderator == "subj_relig_cent" ~ "Subjective religiosity",
    Moderator == "indiv_relig_cent" ~ "Individual religiosity",
    Moderator == "com_relig_cent" ~ "Communal religiosity",
    Moderator == "identityhome_cent" ~ "Home identity",
    Moderator == "identityhost_cent" ~ "Host identity",
    Moderator == "age_cent" ~ "Age",
    TRUE ~ str_to_title(Moderator)
  ))
}

weak_groups   <- build_block("weakties_cent", "groups")
strong_groups <- build_block("strongties_cent", "groups")

table_GROUPS <- weak_groups %>%
  left_join(strong_groups, by = "Moderator") %>%
  mutate(Moderator = factor(Moderator, levels = mods_all)) %>%
  arrange(Moderator) %>%
  format_moderator_names()

names(table_GROUPS) <- c("Moderator", rep(abbr_groups, 2))

print(
  kbl(table_GROUPS,
      align = "l",
      caption = "Within-Migrant-Group Heterogeneity of Tie Effects (Simple Slopes)") %>%
    add_header_above(c(" " = 1,
                       "Migrant Groups–Weak Ties"   = length(all_origins),
                       "Migrant Groups–Strong Ties" = length(all_origins))) %>%
    footnote(general = paste(
      "Cells are OR. † p<.10, * p<.05, ** p<.01, *** p<.001.",
      "Continuous moderators show ORs at -1 SD / Mean / +1 SD."
    ), general_title = "Notes: ")
)

weak_countries   <- build_block("weakties_cent", "countries")
strong_countries <- build_block("strongties_cent", "countries")

table_COUNTRIES <- weak_countries %>%
  left_join(strong_countries, by = "Moderator") %>%
  mutate(Moderator = factor(Moderator, levels = mods_all)) %>%
  arrange(Moderator) %>%
  format_moderator_names()

names(table_COUNTRIES) <- c("Moderator", rep(abbr_countries, 2))

print(
  kbl(table_COUNTRIES,
      align = "l",
      caption = "Within-Host-Country Heterogeneity of Tie Effects (Simple Slopes)") %>%
    add_header_above(c(" " = 1,
                       "Host Countries–Weak Ties"   = length(all_countries),
                       "Host Countries–Strong Ties" = length(all_countries))) %>%
    footnote(general = paste(
      "Cells are OR. † p<.10, * p<.05, ** p<.01, *** p<.001.",
      "Continuous moderators show ORs at -1 SD / Mean / +1 SD."
    ), general_title = "Notes: ")
)